Skip to content

[LoongArch] Enable LoopTermFold Pass #130737

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from

Conversation

tangaac
Copy link
Contributor

@tangaac tangaac commented Mar 11, 2025

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Mar 11, 2025

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-loongarch

Author: None (tangaac)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/130737.diff

5 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp (+3-1)
  • (modified) llvm/test/CodeGen/LoongArch/opt-pipeline.ll (+1)
  • (modified) llvm/test/CodeGen/LoongArch/preferred-alignments.ll (+3-3)
  • (added) llvm/test/Transforms/LoopStrengthReduce/LoongArch/lit.local.cfg (+2)
  • (added) llvm/test/Transforms/LoopStrengthReduce/LoongArch/term-fold-crash.ll (+43)
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
index 62b08be5435cd..53b884563ad88 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
@@ -146,7 +146,9 @@ namespace {
 class LoongArchPassConfig : public TargetPassConfig {
 public:
   LoongArchPassConfig(LoongArchTargetMachine &TM, PassManagerBase &PM)
-      : TargetPassConfig(TM, PM) {}
+      : TargetPassConfig(TM, PM) {
+    EnableLoopTermFold = true;
+  }
 
   LoongArchTargetMachine &getLoongArchTargetMachine() const {
     return getTM<LoongArchTargetMachine>();
diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
index ab76d4e998d2b..c6c1c124c8314 100644
--- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
+++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
@@ -44,6 +44,7 @@
 ; LAXX-NEXT:         Canonicalize Freeze Instructions in Loops
 ; LAXX-NEXT:         Induction Variable Users
 ; LAXX-NEXT:         Loop Strength Reduction
+; LAXX-NEXT:         Loop Terminator Folding
 ; LAXX-NEXT:       Basic Alias Analysis (stateless AA impl)
 ; LAXX-NEXT:       Function Alias Analysis Results
 ; LAXX-NEXT:       Merge contiguous icmps into a memcmp
diff --git a/llvm/test/CodeGen/LoongArch/preferred-alignments.ll b/llvm/test/CodeGen/LoongArch/preferred-alignments.ll
index c3618db646016..2e12d7ed9d13f 100644
--- a/llvm/test/CodeGen/LoongArch/preferred-alignments.ll
+++ b/llvm/test/CodeGen/LoongArch/preferred-alignments.ll
@@ -10,13 +10,13 @@ define signext i32 @sum(ptr noalias nocapture noundef readonly %0, i32 noundef s
 ; LA464-NEXT:  # %bb.1:
 ; LA464-NEXT:    move $a2, $zero
 ; LA464-NEXT:    bstrpick.d $a1, $a1, 31, 0
+; LA464-NEXT:    alsl.d $a1, $a1, $a0, 2
 ; LA464-NEXT:    .p2align 4, , 16
 ; LA464-NEXT:  .LBB0_2: # =>This Inner Loop Header: Depth=1
 ; LA464-NEXT:    ld.w $a3, $a0, 0
-; LA464-NEXT:    add.w $a2, $a3, $a2
-; LA464-NEXT:    addi.d $a1, $a1, -1
 ; LA464-NEXT:    addi.d $a0, $a0, 4
-; LA464-NEXT:    bnez $a1, .LBB0_2
+; LA464-NEXT:    add.w $a2, $a3, $a2
+; LA464-NEXT:    bne $a0, $a1, .LBB0_2
 ; LA464-NEXT:  # %bb.3:
 ; LA464-NEXT:    move $a0, $a2
 ; LA464-NEXT:    ret
diff --git a/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lit.local.cfg b/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lit.local.cfg
new file mode 100644
index 0000000000000..cc24278acbb41
--- /dev/null
+++ b/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lit.local.cfg
@@ -0,0 +1,2 @@
+if not "LoongArch" in config.root.targets:
+    config.unsupported = True
diff --git a/llvm/test/Transforms/LoopStrengthReduce/LoongArch/term-fold-crash.ll b/llvm/test/Transforms/LoopStrengthReduce/LoongArch/term-fold-crash.ll
new file mode 100644
index 0000000000000..54fe262aca941
--- /dev/null
+++ b/llvm/test/Transforms/LoopStrengthReduce/LoongArch/term-fold-crash.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=loop-reduce,loop-term-fold -mtriple=loongarch64 < %s | FileCheck %s
+
+define void @test(ptr %p, i8 %arg, i32 %start) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[P:%.*]], i8 [[ARG:%.*]], i32 [[START:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[ARG]] to i32
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[CONV]], 1
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[START]], [[SHR]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[TMP0]], 1
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[ADD810:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[IDXPROM2:%.*]] = zext i32 [[ADD810]] to i64
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr i8, ptr [[P]], i64 [[IDXPROM2]]
+; CHECK-NEXT:    [[V:%.*]] = load i8, ptr [[ARRAYIDX3]], align 1
+; CHECK-NEXT:    [[ADD]] = add i32 [[ADD810]], 1
+; CHECK-NEXT:    [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq i32 [[ADD]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label %[[EXIT:.*]], label %[[FOR_BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %conv = zext i8 %arg to i32
+  %shr = lshr i32 %conv, 1
+  %wide.trip.count = zext nneg i32 %shr to i64
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %add810 = phi i32 [ %start, %entry ], [ %add, %for.body ]
+  %idxprom2 = zext i32 %add810 to i64
+  %arrayidx3 = getelementptr i8, ptr %p, i64 %idxprom2
+  %v = load i8, ptr %arrayidx3, align 1
+  %add = add i32 %add810, 1
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv, %wide.trip.count
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  ret void
+}

Copy link
Contributor

@SixWeining SixWeining left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the effect after enabling this pass?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems that it's unnecessary to borrow this crash test from RISCV.

@tangaac
Copy link
Contributor Author

tangaac commented Mar 13, 2025

What's the effect after enabling this pass?

This will reduce one addi instruction in loop.body but with extra setup cost.
For most cases, this keep the same behavior with gcc.

clang temp.c -S -O1

void foo(int *__restrict a, short int * __restrict b, int n) {
	for(int i = 0 ; i < n; i++ )
		a[i] = b[i];
}

before

# %bb.0:
	ori	$a3, $zero, 1
	blt	$a2, $a3, .LBB0_2
	.p2align	4, , 16
.LBB0_1:                                # =>This Inner Loop Header: Depth=1
	ld.h	$a3, $a1, 0
	st.w	$a3, $a0, 0
	addi.d	$a0, $a0, 4
	addi.d	$a2, $a2, -1
	addi.d	$a1, $a1, 2
	bnez	$a2, .LBB0_1
.LBB0_2:
	ret

after

# %bb.0:                                # %entry
	ori	$a3, $zero, 1
	blt	$a2, $a3, .LBB0_3
# %bb.1:                                # %for.body.preheader
	alsl.d	$a2, $a2, $a0, 2
	.p2align	4, , 16
.LBB0_2:                                # %for.body
                                        # =>This Inner Loop Header: Depth=1
	ld.h	$a3, $a1, 0
	st.w	$a3, $a0, 0
	addi.d	$a0, $a0, 4
	addi.d	$a1, $a1, 2
	bne	$a0, $a2, .LBB0_2
.LBB0_3:                                # %for.cond.cleanup
	ret

@SixWeining
Copy link
Contributor

What's the effect after enabling this pass?

This will reduce one addi instruction in loop.body but with extra setup cost. For most cases, this keep the same behavior with gcc.

clang temp.c -S -O1

void foo(int *__restrict a, short int * __restrict b, int n) {
	for(int i = 0 ; i < n; i++ )
		a[i] = b[i];
}

before

# %bb.0:
	ori	$a3, $zero, 1
	blt	$a2, $a3, .LBB0_2
	.p2align	4, , 16
.LBB0_1:                                # =>This Inner Loop Header: Depth=1
	ld.h	$a3, $a1, 0
	st.w	$a3, $a0, 0
	addi.d	$a0, $a0, 4
	addi.d	$a2, $a2, -1
	addi.d	$a1, $a1, 2
	bnez	$a2, .LBB0_1
.LBB0_2:
	ret

after

# %bb.0:                                # %entry
	ori	$a3, $zero, 1
	blt	$a2, $a3, .LBB0_3
# %bb.1:                                # %for.body.preheader
	alsl.d	$a2, $a2, $a0, 2
	.p2align	4, , 16
.LBB0_2:                                # %for.body
                                        # =>This Inner Loop Header: Depth=1
	ld.h	$a3, $a1, 0
	st.w	$a3, $a0, 0
	addi.d	$a0, $a0, 4
	addi.d	$a1, $a1, 2
	bne	$a0, $a2, .LBB0_2
.LBB0_3:                                # %for.cond.cleanup
	ret

I see. Could you pre-commit a dedicate IR testcase in llvm/test/CodeGen/LoongArch?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants